

% Hix Hortala-Vallve-Riambau ("The Effects of District Magnitude on Voting Behavior", Journal of Politics)
% HHVR_2016_Master_code.m
% This code serves to find the main results for Table 4. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%       For purposes of pure replication of Table 4 only, please read this.
%
%       1. First, the code will ask the user to choose between a subsample or the
%       full sample: press '4', in order to choose the full sample.
%              'Which sample would you like?' [Answer:] 4
%
%       2. Once running the code, it will ask the user to choose the District Magnitude: 
%       choose 1,2,3 or 0 (for Proportional)
%                'Which DM size would you like (Proportional=0)?' [Answer:] 0,1,2 or 3
% 
%       3. Then it will ask whether to get rid of individuals whose
%       strategies we cannot rationalize: press '0' in order to keep ALL
%       observations
%                'Want a dataset with no "other" type votes? press 1, 0 othwerwise' [Answer:] 0 
%
%       Then let the code run. The variable of interest is "PI"
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% This code has to be run in conjunction with:
%        - The data ("HHVR_DATA.mat") [or "HHVR_DATA_*randommax" if a particular subset is chosen. See description of the four datasets below.]
%        - The file that runs the conditional logit: HHVR_clogit_3_types.m
%        - The files that run the EM algorithm: HHVR_EM1a_3_types.m and HHVR_EM1b_3_types.m
%        - The file that computes the predicted probabilities and assesses
%        the model: HHVR_prediction.m
%        - NOTE: For DM=Proportional, the files that this code calls are the following: HHVR_clogit_2_types.m, HHVR_EM1a_2_types.m, 
%       HHVR_EM1b_2_types.m and HHVR_prediction_2t.m 


% Note on the dataset: The dataset that this document loads up includes
% rounds 2,3,4 and 5 for each election, since round 1 is not needed for the
% analysis. (Rounds are column 15 when first loading the dataset)

% For table 4, the results are given by the 3x1 vector "PI": PI = [% strategic; % sincere; % Frontrunner]


clear all
global DATA choices y y_short X M individuals 
 

% Description of the datasets:

%--%%--%%--%%--%
%   Sample 1	gets rid of all participants who in at least 50% of the first rounds did not vote for their favorite party
% 	92.45% of original sample is kept
%
%   Sample 2	gets rid of all participants who in at least 25% of the first rounds did not vote for their favorite party
% 	75.94% of original sample is kept
%
%   Sample 3	gets rid of all participants who in at least 10% of the first rounds did not vote for their favorite party
% 	48.11% of original sample is kept
%
%   Sample 4 is the original full sample dataset
%--%%--%%--%%--%


disp 'Which sample would you like?'
sample = ' Choose now, please: 1, 2, 3 or 4(=full):...' ; 
choice_sample = input(sample)
if choice_sample == 1
   load  HHVR_DATA_50randommax
elseif choice_sample == 2
   load  HHVR_DATA_25randommax
elseif choice_sample == 3
   load  HHVR_DATA_10randommax
elseif choice_sample == 4
   load  HHVR_DATA
end

%--%%--%%--%%--%
%  The variables are:  id - vote - Expected utility - Utility -  [5]winner previous election - runnerup previous election - third previous election - fourth previous election  - fifth previous election -  [10]Vote Share -  Margin of victory -  Disjoint sample (or not) - District Magnitde -  Party -  Round -  Period - Effective # parties  - MARGIN VOTE previous ROUND  
%--%%--%%--%%--%
 
choices = 5 ;
% valid_rounds = 48;
round = data(:,15); period = data(:,16);

%--%%--%%--%%--%
       M = data(:,13); party = data(:,14);disjoint= data(:,12); margin_vote=data(:,18); effective_num_parties = data(:,17);
       DATA_ = [data(:,1:10) M party disjoint round period   margin_vote effective_num_parties ]; 
%--%%--%%--%%--% 
 
% sorting by District Magnitude ("M") to facilitiate analysis by District Magnitude

[Y,I]=sort(DATA_(:,11));
DATA=DATA_(I,:); %use the column indices from sort() to sort all columns of A.
clear Y I DATA_
 
% Choosing the particular subsample (by district magnitude)
% SAMPLE 1:
% M=1 starts at i=10,080
% M=2 starts at i=20,641
% M=3 starts at i=31,921
% SAMPLE 2:
% M=1 starts at i=8,881
% M=2 starts at i=18,241
% M=3 starts at i=25,921
% SAMPLE 3:
% M=1 starts at i=5,281
% M=2 starts at i=11,041
% M=3 starts at i=16,561
% FULL SAMPLE:
% M=1 starts at i=10,561
% M=2 starts at i=22,081
% M=3 starts at i=34,081
 

disp 'Which DM size would you like (Proportional=0)?'
district_magnitude = ' Choose now, please ' ; 
choice = input(district_magnitude)
if choice == 0
    DM0=1; %dummy that denotes that we have a PR system
    disp 'Dataset chosen to be Proportional'
    if choice_sample==1
    DATA = DATA(1:10080,:);    
    elseif choice_sample==2
    DATA = DATA(1:8880,:);           
    elseif choice_sample==3
    DATA = DATA(1:5280,:);   
    elseif choice_sample==4
    DATA = DATA(1:10560,:); 
    end
elseif choice ==1
    disp 'Dataset chosen to be DM=1'
    if choice_sample==1
    DATA = DATA(10081:20640,:);  
    elseif choice_sample==2
    DATA = DATA(8881:18240,:);     
    elseif choice_sample==3
    DATA = DATA(5281:11040,:); 
    elseif choice_sample==4
    DATA = DATA(10561:22080,:); 
    end
elseif choice ==2
    disp 'Dataset chosen to be DM=2'
    if choice_sample==1    
    DATA = DATA(20641:31920,:);     
    elseif choice_sample==2
    DATA = DATA(18241:25920,:);
    elseif choice_sample==3
    DATA = DATA(11041:16560,:);
    elseif choice_sample==4
    DATA = DATA(22081:34080,:);
    end
elseif choice ==3
    disp 'Dataset chosen to be DM=3'
    if choice_sample==1    
    DATA = DATA(31921:end,:);    
    elseif choice_sample==2
    DATA = DATA(25921:end,:);
    elseif choice_sample==3
    DATA = DATA(16561:end,:);
    elseif choice_sample==4
    DATA = DATA(34081:end,:);
    end
else
    disp 'ERROR: please type 0, 1, 2 or 3'
    break
end
 
 DM=10;
 if size(DATA,1) == 10080
     DM=0;
 end
 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Next: creation of the data set for the disjoint sample only
% Data_disjoint = [] ;
% for i = 1 : size(DATA,1)
%    if DATA(i,13)==1
%        Data_disjoint = [Data_disjoint;  DATA(i,:)];
%    end
% end
% DATA = Data_disjoint;
% [end] creation the data set for the disjoint sample only
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


% creating a 'short' vector of dependent variable y
y = DATA(:,2);
y_ = reshape(y,choices,size(y,1)/choices);
y_short = zeros(size(y,1)/choices,1);
for i = 1 : size(y,1)/choices
    for j = 1 : choices
        if y_(j,i) == 1
            y_short(i) = j;
        end
    end
end

individuals = size(DATA,1)/choices;

% Creating the constants for the regression:
c2_ = [1 1 1 1 1]'; c2=repmat(c2_,size(DATA,1)/choices,1);c3_ = [1 1 1 1 1]'; c3=repmat(c3_,size(DATA,1)/choices,1);
c4_ = [1 1 1 1 1]'; c4=repmat(c4_,size(DATA,1)/choices,1);c5_ = [1 1 1 1 1]'; c5=repmat(c5_,size(DATA,1)/choices,1);
 
if DM==0 % One column less because for DM=PR Expected Utility and Utility are the same
X =   [DATA(:,3) DATA(:,5) c2 c3 c4 c5] ;  
else
X =   [DATA(:,3:5)  c2 c3 c4 c5 ] ;
end
clear c2_ c2 c3 c3_ c4 c4_ c5 c5_
 
% Note: other types are individuals who voted for a party that was neither
% her favorite (=most preferred), neither the one that gave her the highest expected utility,
% neither the one which won the previous round.

disp 'want a dataset with no "other" type votes? press 1, 0 othwerwise'
no_other_vote_dataset = '"choose" ' ; 
choice2 = input(no_other_vote_dataset)
if choice2 == 1
    disp 'Dataset chosen to be have no "other" votes'
elseif choice2 ==0
    disp 'Full dataset chosen, including "other" votes'
else
    disp 'ERROR: please type 0 or 1'
    break
end
if choice2 == 1 
 [dataset_noothervote, consistent] = RationalizableVotes( DATA );
 DATA = dataset_noothervote;
 individuals = size(DATA,1)/choices;
 c2_ = [1 1 1 1 1]'; c2=repmat(c2_,size(DATA,1)/choices,1);
 c3_ = [1 1 1 1 1]'; c3=repmat(c3_,size(DATA,1)/choices,1);
 c4_ = [1 1 1 1 1]'; c4=repmat(c4_,size(DATA,1)/choices,1);
 c5_ = [1 1 1 1 1]'; c5=repmat(c5_,size(DATA,1)/choices,1);
 X =   [DATA(:,3:5)  c2 c3 c4 c5 ] ;
end


ywillnew= zeros(choices,1,size(DATA,1)/choices); % Vector of votes
for dd=1:size(DATA,1)/choices
    ywillnew(1:end,:,dd) = DATA((dd*choices)-choices+1:(dd*choices),2);
end
ywill_ = zeros(size(DATA,1)/choices,1);
for i = 1 : size(DATA,1)/choices
    for j = 1 : choices
        if ywillnew(j,:,i)==1
    ywill_(i) = j;
        end
    end
end

% creating a 'short' vector of dependent variable y
y = DATA(:,2);
y_ = reshape(y,choices,size(y,1)/choices);
y_short = zeros(size(y,1)/choices,1);
for i = 1 : size(y,1)/choices
    for j = 1 : choices
        if y_(j,i) == 1
            y_short(i) = j;
        end
    end
end
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                    EM algorithm for 3 types
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if choice>0
    
% First run conditional logit model to have an initial guess for the parameters of the model  
initial_guess = ones(7,1);
tic
[parameters_ML] = fsolve(@(bbeta)    HHVR_clogit_3_types(bbeta ) , initial_guess ,optimset('maxiter',10000, 'MaxFunEvals',10000 ) ) ;
time_solve_Clogit = toc

% Now run EM algorithm  
initial_guess = parameters_ML ;
[parameters_EM   z Z  LogLik loglik PI  total_vote_type ] = HHVR_EM1a_3_types(initial_guess );
parameters_3types = [parameters_ML parameters_EM];

% Check the % correctly predicted by a 
[percent_correct_ML percent_correct_EM ] = HHVR_prediction(  parameters_ML,  parameters_EM, z )

 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                    EM algorithm for 2 types (i.e. DM=0)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
elseif choice==0
X =   [X(:,1) X(:,3:end)] ;    % ones(size(X,1),1)
initial_guess = ones(6,1);
[parameters_ML] = fsolve(@(bbeta)    HHVR_clogit_2_types(bbeta ) , initial_guess ,optimset('maxiter',10000, 'MaxFunEvals',10000 ) ) ;

initial_guess =  parameters_ML ;

 [parameters_EM   z Z  LOGLIK loglik PI  ] = HHVR_EM1a_2_types(initial_guess ); % same as
 parameters_2types = [parameters_ML parameters_EM];
 [percent_correct_ML percent_correct_EM] = HHVR_prediction_2t(  parameters_ML,  parameters_EM, z )
 
 
end